
/*

________________________________________________________________________________
Descriptive statistics include the following items

_________
Main text

Table 1: Descriptive statistics, individuals
	
Table 2: Descriptive statistics, new incorporated businesses

_________
Appendix

Table A1: Two most frequent industries of new incorporated businesses by individual and parental income
	
Figure A1: New entrepreneurs over the sample years
	
Figure A5: Industry composition of new incorporated businesses

*/









/*
________________________________________________________________________________
Main text
*/

/*
________________________________________________________________________________
Table 1: Descriptive statistics, individuals
*/

use "$data\01_main\folk_flown_merge_eventready.dta", clear

********************************************************************************
/*
At t-1
*/

keep if event_ie==-1 | event_uie==-1 | event_we==-1
keep shnro-vuosi dspi-has_d_are sukup-lkm_k ika event* ptile_prnt prnt_entr
gen edu4=0
replace edu4=1 if highed==1 | highed==2
gen edu6=0
replace edu6=1 if highed==2

gen nro=_n
gen var=""
replace var="Observations" if nro==3
replace var="Disposable income" if nro==5
replace var="Disposable income rank" if nro==6
replace var="Market income" if nro==7
replace var="Market income rank" if nro==8
replace var="Parental income rank" if nro==9
replace var="Age" if nro==10
replace var="Parent entrepreneur" if nro==11
replace var="Female" if nro==12
replace var="Edu above 4" if nro==13
replace var="Edu above 6" if nro==14
replace var="Capital region" if nro==15
replace var="Married" if nro==16
replace var="Number of children" if nro==17
gen all=.
gen w=.
gen all_e=.
gen uie=.
gen ie=.

* All
count
replace all=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k, stat(mean) save
replace all=r(StatTotal)[1,1] if nro==5
replace all=r(StatTotal)[1,2] if nro==6
replace all=r(StatTotal)[1,3] if nro==7
replace all=r(StatTotal)[1,4] if nro==8
replace all=r(StatTotal)[1,5] if nro==9
replace all=r(StatTotal)[1,6] if nro==10
replace all=r(StatTotal)[1,7] if nro==11
replace all=r(StatTotal)[1,8] if nro==12
replace all=r(StatTotal)[1,9] if nro==13
replace all=r(StatTotal)[1,10] if nro==14
replace all=r(StatTotal)[1,11] if nro==15
replace all=r(StatTotal)[1,11] if nro==16
replace all=r(StatTotal)[1,11] if nro==17

* Wage earners
count if event_we==-1
replace w=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_we==-1, stat(mean) save
replace w=r(StatTotal)[1,1] if nro==5
replace w=r(StatTotal)[1,2] if nro==6
replace w=r(StatTotal)[1,3] if nro==7
replace w=r(StatTotal)[1,4] if nro==8
replace w=r(StatTotal)[1,5] if nro==9
replace w=r(StatTotal)[1,6] if nro==10
replace w=r(StatTotal)[1,7] if nro==11
replace w=r(StatTotal)[1,8] if nro==12
replace w=r(StatTotal)[1,9] if nro==13
replace w=r(StatTotal)[1,10] if nro==14
replace w=r(StatTotal)[1,11] if nro==15
replace w=r(StatTotal)[1,11] if nro==16
replace w=r(StatTotal)[1,11] if nro==17

* All entrepreneurs
count if event_ie==-1 | event_uie==-1
replace all_e=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_ie==-1 | event_uie==-1, stat(mean) save
replace all_e=r(StatTotal)[1,1] if nro==5
replace all_e=r(StatTotal)[1,2] if nro==6
replace all_e=r(StatTotal)[1,3] if nro==7
replace all_e=r(StatTotal)[1,4] if nro==8
replace all_e=r(StatTotal)[1,5] if nro==9
replace all_e=r(StatTotal)[1,6] if nro==10
replace all_e=r(StatTotal)[1,7] if nro==11
replace all_e=r(StatTotal)[1,8] if nro==12
replace all_e=r(StatTotal)[1,9] if nro==13
replace all_e=r(StatTotal)[1,10] if nro==14
replace all_e=r(StatTotal)[1,11] if nro==15
replace all_e=r(StatTotal)[1,11] if nro==16
replace all_e=r(StatTotal)[1,11] if nro==17

* Unincorporated entrepreneurs
count if event_uie==-1
replace uie=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_uie==-1, stat(mean) save
replace uie=r(StatTotal)[1,1] if nro==5
replace uie=r(StatTotal)[1,2] if nro==6
replace uie=r(StatTotal)[1,3] if nro==7
replace uie=r(StatTotal)[1,4] if nro==8
replace uie=r(StatTotal)[1,5] if nro==9
replace uie=r(StatTotal)[1,6] if nro==10
replace uie=r(StatTotal)[1,7] if nro==11
replace uie=r(StatTotal)[1,8] if nro==12
replace uie=r(StatTotal)[1,9] if nro==13
replace uie=r(StatTotal)[1,10] if nro==14
replace uie=r(StatTotal)[1,11] if nro==15
replace uie=r(StatTotal)[1,11] if nro==16
replace uie=r(StatTotal)[1,11] if nro==17

* Incorporated entrepreneurs
count if event_ie==-1
replace ie=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_ie==-1, stat(mean) save
replace ie=r(StatTotal)[1,1] if nro==5
replace ie=r(StatTotal)[1,2] if nro==6
replace ie=r(StatTotal)[1,3] if nro==7
replace ie=r(StatTotal)[1,4] if nro==8
replace ie=r(StatTotal)[1,5] if nro==9
replace ie=r(StatTotal)[1,6] if nro==10
replace ie=r(StatTotal)[1,7] if nro==11
replace ie=r(StatTotal)[1,8] if nro==12
replace ie=r(StatTotal)[1,9] if nro==13
replace ie=r(StatTotal)[1,10] if nro==14
replace ie=r(StatTotal)[1,11] if nro==15
replace ie=r(StatTotal)[1,11] if nro==16
replace ie=r(StatTotal)[1,11] if nro==17

* Facelift
preserve // the top section
format all w all_e uie ie %10.0f
keep if nro<=10
keep var-ie
save "$results\02_descriptives\tab1_tmin1_top.dta", replace
restore
preserve // the bottom section
format all w all_e uie ie %10.2f
keep if nro>=11 & nro<=17
keep var-ie
save "$results\02_descriptives\tab1_tmin1_bottom.dta", replace
restore

********************************************************************************
/*
Entire sample (not the full population but those we follow from -5 to +10)
*/

use "$data\01_main\folk_flown_merge_eventready.dta", clear
keep shnro-vuosi dspi-has_d_are sukup-lkm_k ika event* ptile_prnt prnt_entr
gen edu4=0
replace edu4=1 if highed==1 | highed==2
gen edu6=0
replace edu6=1 if highed==2

gen nro=_n
gen var=""
replace var="Observations" if nro==3
replace var="Disposable income" if nro==5
replace var="Disposable income rank" if nro==6
replace var="Market income" if nro==7
replace var="Market income rank" if nro==8
replace var="Parental income rank" if nro==9
replace var="Age" if nro==10
replace var="Parent entrepreneur" if nro==11
replace var="Female" if nro==12
replace var="Edu above 4" if nro==13
replace var="Edu above 6" if nro==14
replace var="Capital region" if nro==15
replace var="Married" if nro==16
replace var="Number of children" if nro==17
gen all=.
gen w=.
gen all_e=.
gen uie=.
gen ie=.

* All
count
replace all=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k, stat(mean) save
replace all=r(StatTotal)[1,1] if nro==5
replace all=r(StatTotal)[1,2] if nro==6
replace all=r(StatTotal)[1,3] if nro==7
replace all=r(StatTotal)[1,4] if nro==8
replace all=r(StatTotal)[1,5] if nro==9
replace all=r(StatTotal)[1,6] if nro==10
replace all=r(StatTotal)[1,7] if nro==11
replace all=r(StatTotal)[1,8] if nro==12
replace all=r(StatTotal)[1,9] if nro==13
replace all=r(StatTotal)[1,10] if nro==14
replace all=r(StatTotal)[1,11] if nro==15
replace all=r(StatTotal)[1,11] if nro==16
replace all=r(StatTotal)[1,11] if nro==17

* Wage earners
count if event_we!=.
replace w=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_we!=., stat(mean) save
replace w=r(StatTotal)[1,1] if nro==5
replace w=r(StatTotal)[1,2] if nro==6
replace w=r(StatTotal)[1,3] if nro==7
replace w=r(StatTotal)[1,4] if nro==8
replace w=r(StatTotal)[1,5] if nro==9
replace w=r(StatTotal)[1,6] if nro==10
replace w=r(StatTotal)[1,7] if nro==11
replace w=r(StatTotal)[1,8] if nro==12
replace w=r(StatTotal)[1,9] if nro==13
replace w=r(StatTotal)[1,10] if nro==14
replace w=r(StatTotal)[1,11] if nro==15
replace w=r(StatTotal)[1,11] if nro==16
replace w=r(StatTotal)[1,11] if nro==17

* All entrepreneurs
count if event_ie!=. | event_uie!=.
replace all_e=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_ie!=. | event_uie!=., stat(mean) save
replace all_e=r(StatTotal)[1,1] if nro==5
replace all_e=r(StatTotal)[1,2] if nro==6
replace all_e=r(StatTotal)[1,3] if nro==7
replace all_e=r(StatTotal)[1,4] if nro==8
replace all_e=r(StatTotal)[1,5] if nro==9
replace all_e=r(StatTotal)[1,6] if nro==10
replace all_e=r(StatTotal)[1,7] if nro==11
replace all_e=r(StatTotal)[1,8] if nro==12
replace all_e=r(StatTotal)[1,9] if nro==13
replace all_e=r(StatTotal)[1,10] if nro==14
replace all_e=r(StatTotal)[1,11] if nro==15
replace all_e=r(StatTotal)[1,11] if nro==16
replace all_e=r(StatTotal)[1,11] if nro==17

* Unincorporated entrepreneurs
count if event_uie!=.
replace uie=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_uie!=., stat(mean) save
replace uie=r(StatTotal)[1,1] if nro==5
replace uie=r(StatTotal)[1,2] if nro==6
replace uie=r(StatTotal)[1,3] if nro==7
replace uie=r(StatTotal)[1,4] if nro==8
replace uie=r(StatTotal)[1,5] if nro==9
replace uie=r(StatTotal)[1,6] if nro==10
replace uie=r(StatTotal)[1,7] if nro==11
replace uie=r(StatTotal)[1,8] if nro==12
replace uie=r(StatTotal)[1,9] if nro==13
replace uie=r(StatTotal)[1,10] if nro==14
replace uie=r(StatTotal)[1,11] if nro==15
replace uie=r(StatTotal)[1,11] if nro==16
replace uie=r(StatTotal)[1,11] if nro==17

* Incorporated entrepreneurs
count if event_ie!=.
replace ie=r(N) if nro==3
tabstat dspi ptile_dspi mkti ptile_mkti ptile_prnt ika prnt_entr sukup edu4 edu6 pks married lkm_k if event_ie!=., stat(mean) save
replace ie=r(StatTotal)[1,1] if nro==5
replace ie=r(StatTotal)[1,2] if nro==6
replace ie=r(StatTotal)[1,3] if nro==7
replace ie=r(StatTotal)[1,4] if nro==8
replace ie=r(StatTotal)[1,5] if nro==9
replace ie=r(StatTotal)[1,6] if nro==10
replace ie=r(StatTotal)[1,7] if nro==11
replace ie=r(StatTotal)[1,8] if nro==12
replace ie=r(StatTotal)[1,9] if nro==13
replace ie=r(StatTotal)[1,10] if nro==14
replace ie=r(StatTotal)[1,11] if nro==15
replace ie=r(StatTotal)[1,11] if nro==16
replace ie=r(StatTotal)[1,11] if nro==17

* Facelift
preserve // the top section
format all w all_e uie ie %10.0f
keep if nro<=10
keep var-ie
save "$results\02_descriptives\tab1_full_top.dta", replace
restore
preserve // the bottom section
format all w all_e uie ie %10.2f
keep if nro>=11 & nro<=17
keep var-ie
save "$results\02_descriptives\tab1_full_bottom.dta", replace
restore

********************************************************************************
/*
Resulting table before final formatting
*/

use "$results\02_descriptives\tab1_full_top.dta", clear
list
use "$results\02_descriptives\tab1_full_bottom.dta", clear
list
use "$results\02_descriptives\tab1_tmin1_top.dta", clear
list
use "$results\02_descriptives\tab1_tmin1_bottom.dta", clear
list

/*
________________________________________________________________________________
Table 2: Descriptive statistics, new incorporated businesses
*/

use if inrange(event_ie,0,10) using "$data\01_main\folk_flown_merge_eventready.dta", clear
keep if inrange(event_ie,0,10)
sort shnro event_ie
merge 1:1 shnro vuosi using "$data\01_main\folk_flown_merge_eventready_iespec.dta", keep(3)
drop _merge

* Keep only individuals who we can follow for ten years
bysort shnro: gen nobs = _N
keep if nobs==11
drop nobs

* Deflate
foreach var of varlist sales stfc va prod {
	replace `var' = `var' / 0.8960 if vuosi==1998
	replace `var' = `var' / 0.9070 if vuosi==1999
	replace `var' = `var' / 0.9340 if vuosi==2000
	replace `var' = `var' / 0.9590 if vuosi==2001
	replace `var' = `var' / 0.9780 if vuosi==2002
	replace `var' = `var' / 0.9910 if vuosi==2003
	replace `var' = `var' / 0.9920 if vuosi==2004
	replace `var' = `var' if vuosi==2005
	replace `var' = `var' / 1.0130 if vuosi==2006
	replace `var' = `var' / 1.0290 if vuosi==2007
	replace `var' = `var' / 1.0690 if vuosi==2008
	replace `var' = `var' / 1.0870 if vuosi==2009
	replace `var' = `var' / 1.1050 if vuosi==2010
	replace `var' = `var' / 1.1420 if vuosi==2011
	replace `var' = `var' / 1.1780 if vuosi==2012
	replace `var' = `var' / 1.2037 if vuosi==2013
	replace `var' = `var' / 1.2183 if vuosi==2014
	replace `var' = `var' / 1.2164 if vuosi==2015
	replace `var' = `var' / 1.2211 if vuosi==2016
	replace `var' = `var' / 1.2313 if vuosi==2017
	replace `var' = `var' / 1.2457 if vuosi==2018
	replace `var' = `var' / 1.2599 if vuosi==2019
}

* The table

gen stat=""
replace stat="mean" if _n==1
replace stat="p50" if _n==2
replace stat="p10" if _n==3
replace stat="p90" if _n==4
replace stat="p99" if _n==5
gen sales_0 = .
gen empl_0 = .
gen stfc_0 = .
gen va_0 = .
gen prod_0 = .
gen sales_10 = .
gen empl_10 = .
gen stfc_10 = .
gen va_10 = .
gen prod_10 = .

preserve
foreach var of varlist sales stfc - prod {
	replace `var' = `var' / 1000
}
keep if event_ie==0
tabstat sales empl stfc va prod, stat(mean median p10 p90 p99) save
restore

forval i = 1(1)5 {
replace sales_0 = r(StatTotal)[`i',1] if _n==`i'
replace empl_0 = r(StatTotal)[`i',2] if _n==`i'
replace stfc_0 = r(StatTotal)[`i',3] if _n==`i'
replace va_0 = r(StatTotal)[`i',4] if _n==`i'
replace prod_0 = r(StatTotal)[`i',5] if _n==`i'
}

preserve
foreach var of varlist sales stfc - prod {
	replace `var' = `var' / 1000
}
keep if event_ie==10
tabstat sales empl stfc va prod, stat(mean median p10 p90 p99) save
restore

forval i = 1(1)5 {
replace sales_10 = r(StatTotal)[`i',1] if _n==`i'
replace empl_10 = r(StatTotal)[`i',2] if _n==`i'
replace stfc_10 = r(StatTotal)[`i',3] if _n==`i'
replace va_10 = r(StatTotal)[`i',4] if _n==`i'
replace prod_10 = r(StatTotal)[`i',5] if _n==`i'
}

keep stat-prod_10
drop if stat==""
save "$results\02_descriptives\tab2.dta", replace









/*
________________________________________________________________________________
Appendix
*/

/*
________________________________________________________________________________
Table A1: Two most frequent industries of new incorporated businesses by individual and parental income
*/

use if event_ie==0 using "$data\01_main\folk_flown_merge_eventready.dta", clear
sort shnro event_ie
merge 1:1 shnro vuosi using "$data\01_main\folk_flown_merge_eventready_iespec.dta", keepusing(firm_ind) keep(3)
drop _merge

* Define quintiles
egen qtile_prnt = cut(ptile_prnt), at(1(20)101)
egen qtile_own = cut(ptile_dspi_tmin1), at(1(20)101)

/*
Two most frequent industries by quintile-quintile bins
CL is defined in the following way
- 1: own 1, parental 1
- 2: own 1, parental 2
- 6: own 2, parental 1
- 25: own 5, parental 5
*/
egen CL = group(qtile_own qtile_prnt)
keep firm_ind qtile_prnt qtile_own CL
drop if firm_ind==.

gen obs=1
collapse (count) obs, by(firm_ind CL)
forval i=1(1)25 {
preserve
	keep if CL==`i'
	gsort -obs
	egen ttl_obs = total(obs)
	gen shr_obs = (obs / ttl_obs)*100
	drop if _n>2
	order CL
	save "$results\02_descriptives\CL_`i'", replace
restore
}

use "$results\02_descriptives\CL_1", clear
forval i=2(1)25 {
	append using "$results\02_descriptives\CL_`i'"
}

forval i=1(1)25 {
	erase "$results\02_descriptives\CL_`i'.dta"
}

gen qtile_own=.
replace qtile_own=1 if _n<=10
replace qtile_own=2 if inrange(_n,11,20)
replace qtile_own=3 if inrange(_n,21,30)
replace qtile_own=4 if inrange(_n,31,40)
replace qtile_own=5 if _n>40

* Save the second most frequent
preserve
bysort CL: gen nr=_n
keep if nr==2
drop qtile_own
foreach var of varlist firm_ind - shr_obs {
	rename `var' `var'_2
}
save "$results\02_descriptives\freq2.dta", replace
restore

bysort CL: gen nr=_n
keep if nr==1
foreach var of varlist firm_ind - shr_obs {
	rename `var' `var'_1
}
merge 1:1 CL using "$results\02_descriptives\freq2.dta"
drop CL nr _merge
bysort qtile_own: gen qtile_prnt=_n
order qtile_own qtile_prnt
erase "$results\02_descriptives\freq2.dta"
save "$results\02_descriptives\tabA1.dta", replace

/*
________________________________________________________________________________
Figure A1: New entrepreneurs over the sample years
*/

use vuosi event_ie event_uie event_we using "$data\01_main\folk_flown_merge_eventready.dta", clear

preserve // unincorporated
keep if event_uie==0
bysort vuosi: gen nobs_uie = _N
collapse nobs_uie, by(vuosi)
save "$results\02_descriptives\uie.dta", replace
restore

preserve // wage earners
keep if event_we==0
bysort vuosi: gen nobs_we = _N
collapse nobs_we, by(vuosi)
save "$results\02_descriptives\we.dta", replace
restore

keep if event_ie==0 // incorporated
bysort vuosi: gen nobs_ie = _N
collapse nobs_ie, by(vuosi)
merge 1:1 vuosi using "$results\02_descriptives\uie.dta"
drop _merge
merge 1:1 vuosi using "$results\02_descriptives\we.dta"
drop _merge
erase "$results\02_descriptives\uie.dta"
erase "$results\02_descriptives\we.dta"
rename vuosi year

foreach x in ie uie we {
	egen ttl_`x' = sum(nobs_`x')
}
foreach x in ie uie we {
	gen shr_`x' = (nobs_`x' / ttl_`x') * 100
}

label var nobs_ie "Number of new incorporated entrepreneurs each year"
label var nobs_uie "Number of new unincorporated entrepreneurs each year"
label var nobs_we "Number of new pseudo-starters each year"
label var ttl_ie "Total number of new incorporated entrepreneurs"
label var ttl_uie "Total number of new unincorporated entrepreneurs"
label var ttl_we "Total number of new pseudo-starters"
label var shr_ie "Share of new incorporated entrepreneurs per year"
label var shr_uie "Share of new unincorporated entrepreneurs per year"
label var shr_we "Share of new pseudo-starters per year"

keep year shr*
save "$results\02_descriptives\figA1.dta", replace

/*
________________________________________________________________________________
Figure A5: Industry composition of new incorporated businesses
*/

use event_ie tol2dig if event_ie==0 using "$data\01_main\folk_flown_merge_eventready.dta", clear

gen count=1
egen ttl = sum(count)
drop count

bysort tol2dig: gen nobs_ind = _N
gen shr_ind = (nobs_ind / ttl) * 100
drop if inlist(tol2dig,0,1,3,5,7,9,11,14,15,17,19,21,24,34,36,37,39,40,51,53,65,67,84,91,92,94,97,99,100)

bysort tol2dig: gen nr=_n
keep if nr==1

keep tol2dig ttl nobs_ind shr_ind

rename tol2dig code_ind
label var code_ind "2-digit industry code (NACE)"
label var ttl "Total number of new incorporated entrepreneurs"
label var nobs_ind "Number of new incorporated entrepreneurs per industry"
label var shr_ind "Share of new incorporated entrepreneurs per industry"

save "$results\02_descriptives\figA5.dta", replace
